{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 随机森林分类模型\n",
    "> 基于内置数据集：鸢尾花数据集进行随机森林的数据预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn import preprocessing\n",
    "from sklearn.datasets import load_iris"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 1.加载数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['DESCR',\n",
       " 'data',\n",
       " 'feature_names',\n",
       " 'filename',\n",
       " 'frame',\n",
       " 'target',\n",
       " 'target_names']"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "iris_data = load_iris()\n",
    "dir(iris_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "特征名称==> ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']\n",
      "目标名称==> ['setosa' 'versicolor' 'virginica']\n"
     ]
    }
   ],
   "source": [
    "feature_names = iris_data.feature_names\n",
    "target_names = iris_data.target_names\n",
    "print(\"特征名称==>\", feature_names)\n",
    "print(\"目标名称==>\", target_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[5.1, 3.5, 1.4, 0.2],\n",
       "       [4.9, 3. , 1.4, 0.2]])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_data = iris_data.data\n",
    "feature_data[0:2, :]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
       "       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,\n",
       "       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target_data = iris_data.target\n",
    "target_data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 2.查看随机森林回归模型的帮助文档"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Help on class RandomForestClassifier in module sklearn.ensemble._forest:\n",
      "\n",
      "class RandomForestClassifier(ForestClassifier)\n",
      " |  RandomForestClassifier(n_estimators=100, *, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, class_weight=None, ccp_alpha=0.0, max_samples=None)\n",
      " |  \n",
      " |  A random forest classifier.\n",
      " |  \n",
      " |  A random forest is a meta estimator that fits a number of decision tree\n",
      " |  classifiers on various sub-samples of the dataset and uses averaging to\n",
      " |  improve the predictive accuracy and control over-fitting.\n",
      " |  The sub-sample size is controlled with the `max_samples` parameter if\n",
      " |  `bootstrap=True` (default), otherwise the whole dataset is used to build\n",
      " |  each tree.\n",
      " |  \n",
      " |  Read more in the :ref:`User Guide <forest>`.\n",
      " |  \n",
      " |  Parameters\n",
      " |  ----------\n",
      " |  n_estimators : int, default=100\n",
      " |      The number of trees in the forest.\n",
      " |  \n",
      " |      .. versionchanged:: 0.22\n",
      " |         The default value of ``n_estimators`` changed from 10 to 100\n",
      " |         in 0.22.\n",
      " |  \n",
      " |  criterion : {\"gini\", \"entropy\"}, default=\"gini\"\n",
      " |      The function to measure the quality of a split. Supported criteria are\n",
      " |      \"gini\" for the Gini impurity and \"entropy\" for the information gain.\n",
      " |      Note: this parameter is tree-specific.\n",
      " |  \n",
      " |  max_depth : int, default=None\n",
      " |      The maximum depth of the tree. If None, then nodes are expanded until\n",
      " |      all leaves are pure or until all leaves contain less than\n",
      " |      min_samples_split samples.\n",
      " |  \n",
      " |  min_samples_split : int or float, default=2\n",
      " |      The minimum number of samples required to split an internal node:\n",
      " |  \n",
      " |      - If int, then consider `min_samples_split` as the minimum number.\n",
      " |      - If float, then `min_samples_split` is a fraction and\n",
      " |        `ceil(min_samples_split * n_samples)` are the minimum\n",
      " |        number of samples for each split.\n",
      " |  \n",
      " |      .. versionchanged:: 0.18\n",
      " |         Added float values for fractions.\n",
      " |  \n",
      " |  min_samples_leaf : int or float, default=1\n",
      " |      The minimum number of samples required to be at a leaf node.\n",
      " |      A split point at any depth will only be considered if it leaves at\n",
      " |      least ``min_samples_leaf`` training samples in each of the left and\n",
      " |      right branches.  This may have the effect of smoothing the model,\n",
      " |      especially in regression.\n",
      " |  \n",
      " |      - If int, then consider `min_samples_leaf` as the minimum number.\n",
      " |      - If float, then `min_samples_leaf` is a fraction and\n",
      " |        `ceil(min_samples_leaf * n_samples)` are the minimum\n",
      " |        number of samples for each node.\n",
      " |  \n",
      " |      .. versionchanged:: 0.18\n",
      " |         Added float values for fractions.\n",
      " |  \n",
      " |  min_weight_fraction_leaf : float, default=0.0\n",
      " |      The minimum weighted fraction of the sum total of weights (of all\n",
      " |      the input samples) required to be at a leaf node. Samples have\n",
      " |      equal weight when sample_weight is not provided.\n",
      " |  \n",
      " |  max_features : {\"auto\", \"sqrt\", \"log2\"}, int or float, default=\"auto\"\n",
      " |      The number of features to consider when looking for the best split:\n",
      " |  \n",
      " |      - If int, then consider `max_features` features at each split.\n",
      " |      - If float, then `max_features` is a fraction and\n",
      " |        `int(max_features * n_features)` features are considered at each\n",
      " |        split.\n",
      " |      - If \"auto\", then `max_features=sqrt(n_features)`.\n",
      " |      - If \"sqrt\", then `max_features=sqrt(n_features)` (same as \"auto\").\n",
      " |      - If \"log2\", then `max_features=log2(n_features)`.\n",
      " |      - If None, then `max_features=n_features`.\n",
      " |  \n",
      " |      Note: the search for a split does not stop until at least one\n",
      " |      valid partition of the node samples is found, even if it requires to\n",
      " |      effectively inspect more than ``max_features`` features.\n",
      " |  \n",
      " |  max_leaf_nodes : int, default=None\n",
      " |      Grow trees with ``max_leaf_nodes`` in best-first fashion.\n",
      " |      Best nodes are defined as relative reduction in impurity.\n",
      " |      If None then unlimited number of leaf nodes.\n",
      " |  \n",
      " |  min_impurity_decrease : float, default=0.0\n",
      " |      A node will be split if this split induces a decrease of the impurity\n",
      " |      greater than or equal to this value.\n",
      " |  \n",
      " |      The weighted impurity decrease equation is the following::\n",
      " |  \n",
      " |          N_t / N * (impurity - N_t_R / N_t * right_impurity\n",
      " |                              - N_t_L / N_t * left_impurity)\n",
      " |  \n",
      " |      where ``N`` is the total number of samples, ``N_t`` is the number of\n",
      " |      samples at the current node, ``N_t_L`` is the number of samples in the\n",
      " |      left child, and ``N_t_R`` is the number of samples in the right child.\n",
      " |  \n",
      " |      ``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\n",
      " |      if ``sample_weight`` is passed.\n",
      " |  \n",
      " |      .. versionadded:: 0.19\n",
      " |  \n",
      " |  min_impurity_split : float, default=None\n",
      " |      Threshold for early stopping in tree growth. A node will split\n",
      " |      if its impurity is above the threshold, otherwise it is a leaf.\n",
      " |  \n",
      " |      .. deprecated:: 0.19\n",
      " |         ``min_impurity_split`` has been deprecated in favor of\n",
      " |         ``min_impurity_decrease`` in 0.19. The default value of\n",
      " |         ``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\n",
      " |         will be removed in 0.25. Use ``min_impurity_decrease`` instead.\n",
      " |  \n",
      " |  \n",
      " |  bootstrap : bool, default=True\n",
      " |      Whether bootstrap samples are used when building trees. If False, the\n",
      " |      whole dataset is used to build each tree.\n",
      " |  \n",
      " |  oob_score : bool, default=False\n",
      " |      Whether to use out-of-bag samples to estimate\n",
      " |      the generalization accuracy.\n",
      " |  \n",
      " |  n_jobs : int, default=None\n",
      " |      The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n",
      " |      :meth:`decision_path` and :meth:`apply` are all parallelized over the\n",
      " |      trees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\n",
      " |      context. ``-1`` means using all processors. See :term:`Glossary\n",
      " |      <n_jobs>` for more details.\n",
      " |  \n",
      " |  random_state : int or RandomState, default=None\n",
      " |      Controls both the randomness of the bootstrapping of the samples used\n",
      " |      when building trees (if ``bootstrap=True``) and the sampling of the\n",
      " |      features to consider when looking for the best split at each node\n",
      " |      (if ``max_features < n_features``).\n",
      " |      See :term:`Glossary <random_state>` for details.\n",
      " |  \n",
      " |  verbose : int, default=0\n",
      " |      Controls the verbosity when fitting and predicting.\n",
      " |  \n",
      " |  warm_start : bool, default=False\n",
      " |      When set to ``True``, reuse the solution of the previous call to fit\n",
      " |      and add more estimators to the ensemble, otherwise, just fit a whole\n",
      " |      new forest. See :term:`the Glossary <warm_start>`.\n",
      " |  \n",
      " |  class_weight : {\"balanced\", \"balanced_subsample\"}, dict or list of dicts,             default=None\n",
      " |      Weights associated with classes in the form ``{class_label: weight}``.\n",
      " |      If not given, all classes are supposed to have weight one. For\n",
      " |      multi-output problems, a list of dicts can be provided in the same\n",
      " |      order as the columns of y.\n",
      " |  \n",
      " |      Note that for multioutput (including multilabel) weights should be\n",
      " |      defined for each class of every column in its own dict. For example,\n",
      " |      for four-class multilabel classification weights should be\n",
      " |      [{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n",
      " |      [{1:1}, {2:5}, {3:1}, {4:1}].\n",
      " |  \n",
      " |      The \"balanced\" mode uses the values of y to automatically adjust\n",
      " |      weights inversely proportional to class frequencies in the input data\n",
      " |      as ``n_samples / (n_classes * np.bincount(y))``\n",
      " |  \n",
      " |      The \"balanced_subsample\" mode is the same as \"balanced\" except that\n",
      " |      weights are computed based on the bootstrap sample for every tree\n",
      " |      grown.\n",
      " |  \n",
      " |      For multi-output, the weights of each column of y will be multiplied.\n",
      " |  \n",
      " |      Note that these weights will be multiplied with sample_weight (passed\n",
      " |      through the fit method) if sample_weight is specified.\n",
      " |  \n",
      " |  ccp_alpha : non-negative float, default=0.0\n",
      " |      Complexity parameter used for Minimal Cost-Complexity Pruning. The\n",
      " |      subtree with the largest cost complexity that is smaller than\n",
      " |      ``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n",
      " |      :ref:`minimal_cost_complexity_pruning` for details.\n",
      " |  \n",
      " |      .. versionadded:: 0.22\n",
      " |  \n",
      " |  max_samples : int or float, default=None\n",
      " |      If bootstrap is True, the number of samples to draw from X\n",
      " |      to train each base estimator.\n",
      " |  \n",
      " |      - If None (default), then draw `X.shape[0]` samples.\n",
      " |      - If int, then draw `max_samples` samples.\n",
      " |      - If float, then draw `max_samples * X.shape[0]` samples. Thus,\n",
      " |        `max_samples` should be in the interval `(0, 1)`.\n",
      " |  \n",
      " |      .. versionadded:: 0.22\n",
      " |  \n",
      " |  Attributes\n",
      " |  ----------\n",
      " |  base_estimator_ : DecisionTreeClassifier\n",
      " |      The child estimator template used to create the collection of fitted\n",
      " |      sub-estimators.\n",
      " |  \n",
      " |  estimators_ : list of DecisionTreeClassifier\n",
      " |      The collection of fitted sub-estimators.\n",
      " |  \n",
      " |  classes_ : ndarray of shape (n_classes,) or a list of such arrays\n",
      " |      The classes labels (single output problem), or a list of arrays of\n",
      " |      class labels (multi-output problem).\n",
      " |  \n",
      " |  n_classes_ : int or list\n",
      " |      The number of classes (single output problem), or a list containing the\n",
      " |      number of classes for each output (multi-output problem).\n",
      " |  \n",
      " |  n_features_ : int\n",
      " |      The number of features when ``fit`` is performed.\n",
      " |  \n",
      " |  n_outputs_ : int\n",
      " |      The number of outputs when ``fit`` is performed.\n",
      " |  \n",
      " |  feature_importances_ : ndarray of shape (n_features,)\n",
      " |      The impurity-based feature importances.\n",
      " |      The higher, the more important the feature.\n",
      " |      The importance of a feature is computed as the (normalized)\n",
      " |      total reduction of the criterion brought by that feature.  It is also\n",
      " |      known as the Gini importance.\n",
      " |  \n",
      " |      Warning: impurity-based feature importances can be misleading for\n",
      " |      high cardinality features (many unique values). See\n",
      " |      :func:`sklearn.inspection.permutation_importance` as an alternative.\n",
      " |  \n",
      " |  oob_score_ : float\n",
      " |      Score of the training dataset obtained using an out-of-bag estimate.\n",
      " |      This attribute exists only when ``oob_score`` is True.\n",
      " |  \n",
      " |  oob_decision_function_ : ndarray of shape (n_samples, n_classes)\n",
      " |      Decision function computed with out-of-bag estimate on the training\n",
      " |      set. If n_estimators is small it might be possible that a data point\n",
      " |      was never left out during the bootstrap. In this case,\n",
      " |      `oob_decision_function_` might contain NaN. This attribute exists\n",
      " |      only when ``oob_score`` is True.\n",
      " |  \n",
      " |  See Also\n",
      " |  --------\n",
      " |  DecisionTreeClassifier, ExtraTreesClassifier\n",
      " |  \n",
      " |  Notes\n",
      " |  -----\n",
      " |  The default values for the parameters controlling the size of the trees\n",
      " |  (e.g. ``max_depth``, ``min_samples_leaf``, etc.) lead to fully grown and\n",
      " |  unpruned trees which can potentially be very large on some data sets. To\n",
      " |  reduce memory consumption, the complexity and size of the trees should be\n",
      " |  controlled by setting those parameter values.\n",
      " |  \n",
      " |  The features are always randomly permuted at each split. Therefore,\n",
      " |  the best found split may vary, even with the same training data,\n",
      " |  ``max_features=n_features`` and ``bootstrap=False``, if the improvement\n",
      " |  of the criterion is identical for several splits enumerated during the\n",
      " |  search of the best split. To obtain a deterministic behaviour during\n",
      " |  fitting, ``random_state`` has to be fixed.\n",
      " |  \n",
      " |  References\n",
      " |  ----------\n",
      " |  .. [1] L. Breiman, \"Random Forests\", Machine Learning, 45(1), 5-32, 2001.\n",
      " |  \n",
      " |  Examples\n",
      " |  --------\n",
      " |  >>> from sklearn.ensemble import RandomForestClassifier\n",
      " |  >>> from sklearn.datasets import make_classification\n",
      " |  >>> X, y = make_classification(n_samples=1000, n_features=4,\n",
      " |  ...                            n_informative=2, n_redundant=0,\n",
      " |  ...                            random_state=0, shuffle=False)\n",
      " |  >>> clf = RandomForestClassifier(max_depth=2, random_state=0)\n",
      " |  >>> clf.fit(X, y)\n",
      " |  RandomForestClassifier(...)\n",
      " |  >>> print(clf.predict([[0, 0, 0, 0]]))\n",
      " |  [1]\n",
      " |  \n",
      " |  Method resolution order:\n",
      " |      RandomForestClassifier\n",
      " |      ForestClassifier\n",
      " |      sklearn.base.ClassifierMixin\n",
      " |      BaseForest\n",
      " |      sklearn.base.MultiOutputMixin\n",
      " |      sklearn.ensemble._base.BaseEnsemble\n",
      " |      sklearn.base.MetaEstimatorMixin\n",
      " |      sklearn.base.BaseEstimator\n",
      " |      builtins.object\n",
      " |  \n",
      " |  Methods defined here:\n",
      " |  \n",
      " |  __init__(self, n_estimators=100, *, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=None, random_state=None, verbose=0, warm_start=False, class_weight=None, ccp_alpha=0.0, max_samples=None)\n",
      " |      Initialize self.  See help(type(self)) for accurate signature.\n",
      " |  \n",
      " |  ----------------------------------------------------------------------\n",
      " |  Data and other attributes defined here:\n",
      " |  \n",
      " |  __abstractmethods__ = frozenset()\n",
      " |  \n",
      " |  ----------------------------------------------------------------------\n",
      " |  Methods inherited from ForestClassifier:\n",
      " |  \n",
      " |  predict(self, X)\n",
      " |      Predict class for X.\n",
      " |      \n",
      " |      The predicted class of an input sample is a vote by the trees in\n",
      " |      the forest, weighted by their probability estimates. That is,\n",
      " |      the predicted class is the one with highest mean probability\n",
      " |      estimate across the trees.\n",
      " |      \n",
      " |      Parameters\n",
      " |      ----------\n",
      " |      X : {array-like, sparse matrix} of shape (n_samples, n_features)\n",
      " |          The input samples. Internally, its dtype will be converted to\n",
      " |          ``dtype=np.float32``. If a sparse matrix is provided, it will be\n",
      " |          converted into a sparse ``csr_matrix``.\n",
      " |      \n",
      " |      Returns\n",
      " |      -------\n",
      " |      y : ndarray of shape (n_samples,) or (n_samples, n_outputs)\n",
      " |          The predicted classes.\n",
      " |  \n",
      " |  predict_log_proba(self, X)\n",
      " |      Predict class log-probabilities for X.\n",
      " |      \n",
      " |      The predicted class log-probabilities of an input sample is computed as\n",
      " |      the log of the mean predicted class probabilities of the trees in the\n",
      " |      forest.\n",
      " |      \n",
      " |      Parameters\n",
      " |      ----------\n",
      " |      X : {array-like, sparse matrix} of shape (n_samples, n_features)\n",
      " |          The input samples. Internally, its dtype will be converted to\n",
      " |          ``dtype=np.float32``. If a sparse matrix is provided, it will be\n",
      " |          converted into a sparse ``csr_matrix``.\n",
      " |      \n",
      " |      Returns\n",
      " |      -------\n",
      " |      p : ndarray of shape (n_samples, n_classes), or a list of n_outputs\n",
      " |          such arrays if n_outputs > 1.\n",
      " |          The class probabilities of the input samples. The order of the\n",
      " |          classes corresponds to that in the attribute :term:`classes_`.\n",
      " |  \n",
      " |  predict_proba(self, X)\n",
      " |      Predict class probabilities for X.\n",
      " |      \n",
      " |      The predicted class probabilities of an input sample are computed as\n",
      " |      the mean predicted class probabilities of the trees in the forest.\n",
      " |      The class probability of a single tree is the fraction of samples of\n",
      " |      the same class in a leaf.\n",
      " |      \n",
      " |      Parameters\n",
      " |      ----------\n",
      " |      X : {array-like, sparse matrix} of shape (n_samples, n_features)\n",
      " |          The input samples. Internally, its dtype will be converted to\n",
      " |          ``dtype=np.float32``. If a sparse matrix is provided, it will be\n",
      " |          converted into a sparse ``csr_matrix``.\n",
      " |      \n",
      " |      Returns\n",
      " |      -------\n",
      " |      p : ndarray of shape (n_samples, n_classes), or a list of n_outputs\n",
      " |          such arrays if n_outputs > 1.\n",
      " |          The class probabilities of the input samples. The order of the\n",
      " |          classes corresponds to that in the attribute :term:`classes_`.\n",
      " |  \n",
      " |  ----------------------------------------------------------------------\n",
      " |  Methods inherited from sklearn.base.ClassifierMixin:\n",
      " |  \n",
      " |  score(self, X, y, sample_weight=None)\n",
      " |      Return the mean accuracy on the given test data and labels.\n",
      " |      \n",
      " |      In multi-label classification, this is the subset accuracy\n",
      " |      which is a harsh metric since you require for each sample that\n",
      " |      each label set be correctly predicted.\n",
      " |      \n",
      " |      Parameters\n",
      " |      ----------\n",
      " |      X : array-like of shape (n_samples, n_features)\n",
      " |          Test samples.\n",
      " |      \n",
      " |      y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n",
      " |          True labels for X.\n",
      " |      \n",
      " |      sample_weight : array-like of shape (n_samples,), default=None\n",
      " |          Sample weights.\n",
      " |      \n",
      " |      Returns\n",
      " |      -------\n",
      " |      score : float\n",
      " |          Mean accuracy of self.predict(X) wrt. y.\n",
      " |  \n",
      " |  ----------------------------------------------------------------------\n",
      " |  Data descriptors inherited from sklearn.base.ClassifierMixin:\n",
      " |  \n",
      " |  __dict__\n",
      " |      dictionary for instance variables (if defined)\n",
      " |  \n",
      " |  __weakref__\n",
      " |      list of weak references to the object (if defined)\n",
      " |  \n",
      " |  ----------------------------------------------------------------------\n",
      " |  Methods inherited from BaseForest:\n",
      " |  \n",
      " |  apply(self, X)\n",
      " |      Apply trees in the forest to X, return leaf indices.\n",
      " |      \n",
      " |      Parameters\n",
      " |      ----------\n",
      " |      X : {array-like, sparse matrix} of shape (n_samples, n_features)\n",
      " |          The input samples. Internally, its dtype will be converted to\n",
      " |          ``dtype=np.float32``. If a sparse matrix is provided, it will be\n",
      " |          converted into a sparse ``csr_matrix``.\n",
      " |      \n",
      " |      Returns\n",
      " |      -------\n",
      " |      X_leaves : ndarray of shape (n_samples, n_estimators)\n",
      " |          For each datapoint x in X and for each tree in the forest,\n",
      " |          return the index of the leaf x ends up in.\n",
      " |  \n",
      " |  decision_path(self, X)\n",
      " |      Return the decision path in the forest.\n",
      " |      \n",
      " |      .. versionadded:: 0.18\n",
      " |      \n",
      " |      Parameters\n",
      " |      ----------\n",
      " |      X : {array-like, sparse matrix} of shape (n_samples, n_features)\n",
      " |          The input samples. Internally, its dtype will be converted to\n",
      " |          ``dtype=np.float32``. If a sparse matrix is provided, it will be\n",
      " |          converted into a sparse ``csr_matrix``.\n",
      " |      \n",
      " |      Returns\n",
      " |      -------\n",
      " |      indicator : sparse matrix of shape (n_samples, n_nodes)\n",
      " |          Return a node indicator matrix where non zero elements indicates\n",
      " |          that the samples goes through the nodes. The matrix is of CSR\n",
      " |          format.\n",
      " |      \n",
      " |      n_nodes_ptr : ndarray of shape (n_estimators + 1,)\n",
      " |          The columns from indicator[n_nodes_ptr[i]:n_nodes_ptr[i+1]]\n",
      " |          gives the indicator value for the i-th estimator.\n",
      " |  \n",
      " |  fit(self, X, y, sample_weight=None)\n",
      " |      Build a forest of trees from the training set (X, y).\n",
      " |      \n",
      " |      Parameters\n",
      " |      ----------\n",
      " |      X : {array-like, sparse matrix} of shape (n_samples, n_features)\n",
      " |          The training input samples. Internally, its dtype will be converted\n",
      " |          to ``dtype=np.float32``. If a sparse matrix is provided, it will be\n",
      " |          converted into a sparse ``csc_matrix``.\n",
      " |      \n",
      " |      y : array-like of shape (n_samples,) or (n_samples, n_outputs)\n",
      " |          The target values (class labels in classification, real numbers in\n",
      " |          regression).\n",
      " |      \n",
      " |      sample_weight : array-like of shape (n_samples,), default=None\n",
      " |          Sample weights. If None, then samples are equally weighted. Splits\n",
      " |          that would create child nodes with net zero or negative weight are\n",
      " |          ignored while searching for a split in each node. In the case of\n",
      " |          classification, splits are also ignored if they would result in any\n",
      " |          single class carrying a negative weight in either child node.\n",
      " |      \n",
      " |      Returns\n",
      " |      -------\n",
      " |      self : object\n",
      " |  \n",
      " |  ----------------------------------------------------------------------\n",
      " |  Readonly properties inherited from BaseForest:\n",
      " |  \n",
      " |  feature_importances_\n",
      " |      The impurity-based feature importances.\n",
      " |      \n",
      " |      The higher, the more important the feature.\n",
      " |      The importance of a feature is computed as the (normalized)\n",
      " |      total reduction of the criterion brought by that feature.  It is also\n",
      " |      known as the Gini importance.\n",
      " |      \n",
      " |      Warning: impurity-based feature importances can be misleading for\n",
      " |      high cardinality features (many unique values). See\n",
      " |      :func:`sklearn.inspection.permutation_importance` as an alternative.\n",
      " |      \n",
      " |      Returns\n",
      " |      -------\n",
      " |      feature_importances_ : ndarray of shape (n_features,)\n",
      " |          The values of this array sum to 1, unless all trees are single node\n",
      " |          trees consisting of only the root node, in which case it will be an\n",
      " |          array of zeros.\n",
      " |  \n",
      " |  ----------------------------------------------------------------------\n",
      " |  Methods inherited from sklearn.ensemble._base.BaseEnsemble:\n",
      " |  \n",
      " |  __getitem__(self, index)\n",
      " |      Return the index'th estimator in the ensemble.\n",
      " |  \n",
      " |  __iter__(self)\n",
      " |      Return iterator over estimators in the ensemble.\n",
      " |  \n",
      " |  __len__(self)\n",
      " |      Return the number of estimators in the ensemble.\n",
      " |  \n",
      " |  ----------------------------------------------------------------------\n",
      " |  Data and other attributes inherited from sklearn.ensemble._base.BaseEnsemble:\n",
      " |  \n",
      " |  __annotations__ = {'_required_parameters': typing.List[str]}\n",
      " |  \n",
      " |  ----------------------------------------------------------------------\n",
      " |  Methods inherited from sklearn.base.BaseEstimator:\n",
      " |  \n",
      " |  __getstate__(self)\n",
      " |  \n",
      " |  __repr__(self, N_CHAR_MAX=700)\n",
      " |      Return repr(self).\n",
      " |  \n",
      " |  __setstate__(self, state)\n",
      " |  \n",
      " |  get_params(self, deep=True)\n",
      " |      Get parameters for this estimator.\n",
      " |      \n",
      " |      Parameters\n",
      " |      ----------\n",
      " |      deep : bool, default=True\n",
      " |          If True, will return the parameters for this estimator and\n",
      " |          contained subobjects that are estimators.\n",
      " |      \n",
      " |      Returns\n",
      " |      -------\n",
      " |      params : mapping of string to any\n",
      " |          Parameter names mapped to their values.\n",
      " |  \n",
      " |  set_params(self, **params)\n",
      " |      Set the parameters of this estimator.\n",
      " |      \n",
      " |      The method works on simple estimators as well as on nested objects\n",
      " |      (such as pipelines). The latter have parameters of the form\n",
      " |      ``<component>__<parameter>`` so that it's possible to update each\n",
      " |      component of a nested object.\n",
      " |      \n",
      " |      Parameters\n",
      " |      ----------\n",
      " |      **params : dict\n",
      " |          Estimator parameters.\n",
      " |      \n",
      " |      Returns\n",
      " |      -------\n",
      " |      self : object\n",
      " |          Estimator instance.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "help(RandomForestClassifier)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 3.构建随机森林回归模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomForestClassifier(n_estimators=50)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rf = RandomForestClassifier(n_estimators=50)\n",
    "rf = rf.fit(feature_data, target_data)\n",
    "rf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 4.模型预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 0])"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res = rf.predict([[5.1, 3.5, 1.4, 0.2],[4.9, 3. , 1.4, 0.2]])\n",
    "res"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
